######################################
# Dataset Construction ###############
######################################
# Set working directory
setwd("//adsroot.itcs.umich.edu/home/hursre/Documents/Papers/Evangelical Signalling/Data")

# Load the library I'll need later
library(stringr)
library(dplyr)
#library(readtext)
library(foreign)

#########################################################################################
# Opening candidate information files - txt files with every candidates by state and year 
# were downloaded from Brazilian TSE website and then were appended into csvs
########################################################################################
range <- c("00", "02", "04","06","08","10","12","14","16")
for (year in range) {
  nam <- paste("c",year,sep="")
  assign(nam, read.csv(paste("20",year,".csv",sep=""), header = TRUE))
}

##########################
# Importing Raw Variables#
##########################
# Year #
year<- c(c00$X3,c02$X3,c04$X3, c06$X3, c08$X3, c10$X3, c12$X3, c14$X3, c16$X3)
length(year)
table(year)

# Round #
round <- c(c00$X4,c02$X4,c04$X4, c06$X4, c08$X4, c10$X4, c12$X4, c14$X4,c16$X4)

# State #
c00$state <- c00$X6
c00$state <- as.character(c00$state)

c02$state <- c02$X6
c02$state <- as.character(c02$state)

c04$state <- c04$X6
c04$state <- as.character(c04$state)

c06$state <- c06$X6
c06$state <- as.character(c06$state)

c08$state <- c08$X6
c08$state <- as.character(c08$state)

c10$state <- c10$X6
c10$state <- as.character(c10$state)

c12$state <- c12$X6
c12$state <- as.character(c12$state)

c14$state <- c14$X6
c14$state <- as.character(c14$state)

c16$state <- c16$X6
c16$state <- as.character(c16$state)

state <- c(c00$state,c02$state,c04$state, c06$state, c08$state, c10$state, c12$state, c14$state, c16$state)

# LOCATION (MUNICIPALITY OR STATE) CODE
lcode <- c(c00$X7,c02$X7,c04$X7, c06$X7, c08$X7, c10$X7, c12$X7, c14$X7, c16$X7)


# MUNICIPALITY NAME
c00$municipality <- c00$X8
c00$municipality <- as.character(c00$municipality)

c02$municipality <- c02$X8
c02$municipality <- as.character(c02$municipality)

c04$municipality <- c04$X8
c04$municipality <- as.character(c04$municipality)

c06$municipality <- c06$X8
c06$municipality <- as.character(c06$municipality)

c08$municipality <- c08$X8
c08$municipality <- as.character(c08$municipality)

c10$municipality <- c10$X8
c10$municipality <- as.character(c10$municipality)

c12$municipality <- c12$X8
c12$municipality <- as.character(c12$municipality)

c14$municipality <- c14$X8
c14$municipality <- as.character(c14$municipality)

c16$municipality <- c16$X8
c16$municipality <- as.character(c16$municipality)

municipality <- c(c00$municipality,c02$municipality,c04$municipality, c06$municipality, c08$municipality, c10$municipality, c12$municipality, c14$municipality, c16$municipality)

## Get rid of accents in order to uniquely identify municipalities
# Upper case
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","E",municipality)
municipality <- gsub("�","I",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","U",municipality)
municipality <- gsub("�","C",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","E",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","U",municipality)
# Lower case just in case
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","E",municipality)
municipality <- gsub("�","I",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","U",municipality)
municipality <- gsub("�","C",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","A",municipality)
municipality <- gsub("�","E",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","O",municipality)
municipality <- gsub("�","U",municipality)

# CARGOCODE
cargocode <- c(c00$X9,c02$X9,c04$X9, c06$X9, c08$X9, c10$X9, c12$X9, c14$X9, c16$X9)

# CARGO 
c00$cargo <- c00$X10
c00$cargo <- as.character(c00$cargo)

c04$cargo <- c04$X10
c04$cargo <- as.character(c04$cargo)

c08$cargo <- c08$X10
c08$cargo <- as.character(c08$cargo)

c12$cargo <- c12$X10
c12$cargo <- as.character(c12$cargo)

c02$cargo <- c02$X10
c02$cargo <- as.character(c02$cargo)

c06$cargo <- c06$X10
c06$cargo <- as.character(c06$cargo)

c10$cargo <- c10$X10
c10$cargo <- as.character(c10$cargo)

c14$cargo <- c14$X10
c14$cargo <- as.character(c14$cargo)

c16$cargo <- c16$X10
c16$cargo <- as.character(c16$cargo)

cargo <- c(c00$cargo,c02$cargo,c04$cargo, c06$cargo, c08$cargo, c10$cargo, c12$cargo, c14$cargo, c16$cargo)


# CANDIDATE NAME
c00$name <- c00$X11
c00$name <- as.character(c00$name)

c04$name <- c04$X11
c04$name <- as.character(c04$name)

c08$name <- c08$X11
c08$name <- as.character(c08$name)

c12$name <- c12$X11
c12$name <- as.character(c12$name)

c02$name <- c02$X11
c02$name <- as.character(c02$name)

c06$name <- c06$X11
c06$name <- as.character(c06$name)

c10$name <- c10$X11
c10$name <- as.character(c10$name)

c14$name <- c14$X11
c14$name <- as.character(c14$name)

c16$name <- c16$X11
c16$name <- as.character(c16$name)

name <- c(c00$name,c02$name,c04$name, c06$name, c08$name, c10$name, c12$name, c14$name, c16$name)

## Getting rid of accents on names
# Upper case
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","E",name)
name <- gsub("�","I",name)
name <- gsub("�","O",name)
name <- gsub("�","U",name)
name <- gsub("�","C",name)
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","E",name)
name <- gsub("�","O",name)
name <- gsub("�","O",name)
name <- gsub("�","U",name)
# Lower case
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","E",name)
name <- gsub("�","I",name)
name <- gsub("�","O",name)
name <- gsub("�","U",name)
name <- gsub("�","C",name)
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","A",name)
name <- gsub("�","E",name)
name <- gsub("�","O",name)
name <- gsub("�","O",name)
name <- gsub("�","U",name)

# BALLOT NAME
c00$ballot <- c00$X14
c00$ballot <- as.character(c00$ballot)

c04$ballot <- c04$X14
c04$ballot <- as.character(c04$ballot)

c08$ballot <- c08$X14
c08$ballot <- as.character(c08$ballot)

c12$ballot <- c12$X15
c12$ballot <- as.character(c12$ballot)

c02$ballot <- c02$X14
c02$ballot <- as.character(c02$ballot)

c06$ballot <- c06$X14
c06$ballot <- as.character(c06$ballot)

c10$ballot <- c10$X14
c10$ballot <- as.character(c10$ballot)

c14$ballot <- c14$X15
c14$ballot <- as.character(c14$ballot)

c16$ballot <- c16$X15
c16$ballot <- as.character(c16$ballot)

ballot <- c(c00$ballot,c02$ballot,c04$ballot, c06$ballot, c08$ballot, c10$ballot, c12$ballot, c14$ballot, c16$ballot)

## Getting rid of accents on ballot names
# Upper case
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","E",ballot)
ballot <- gsub("�","I",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","U",ballot)
ballot <- gsub("�","C",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","E",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","U",ballot)
# Lower case
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","E",ballot)
ballot <- gsub("�","I",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","U",ballot)
ballot <- gsub("�","C",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","A",ballot)
ballot <- gsub("�","E",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","O",ballot)
ballot <- gsub("�","U",ballot)


# PARTY NAME
c00$party <- as.character(c00$X18)
c04$party <- as.character(c04$X18)
c08$party <- as.character(c08$X18)
c12$party <- as.character(c12$X19)

c02$party <- as.character(c02$X18)
c06$party <- as.character(c06$X18)
c10$party <- as.character(c10$X18)
c14$party <- as.character(c14$X19)
c16$party <- as.character(c16$X19)
party <- c(c00$party,c02$party,c04$party, c06$party, c08$party, c10$party, c12$party, c14$party, c16$party)

## Getting rid of accents on party names
# Upper case
party <- gsub("�","A",party)
party <- gsub("�","A",party)
party <- gsub("�","E",party)
party <- gsub("�","I",party)
party <- gsub("�","O",party)
party <- gsub("�","U",party)
party <- gsub("�","C",party)
party <- gsub("�","A",party)
party <- gsub("�","A",party)
party <- gsub("�","A",party)
party <- gsub("�","E",party)
party <- gsub("�","O",party)
party <- gsub("�","O",party)
party <- gsub("�","U",party)
# Lower case
party <- gsub("�","A",party)
party <- gsub("�","A",party)
party <- gsub("�","E",party)
party <- gsub("�","I",party)
party <- gsub("�","O",party)
party <- gsub("�","U",party)
party <- gsub("�","C",party)
party <- gsub("�","A",party)
party <- gsub("�","A",party)
party <- gsub("�","E",party)
party <- gsub("�","O",party)
party <- gsub("�","O",party)
party <- gsub("�","U",party)


# Coalition members
c00$coalmem <- as.character(c00$X22)
c04$coalmem <- as.character(c04$X22)
c08$coalmem <- as.character(c08$X22)
c12$coalmem <- as.character(c12$X23)

c02$coalmem <- as.character(c02$X22)
c06$coalmem <- as.character(c06$X22)
c10$coalmem <- as.character(c10$X22)
c14$coalmem <- as.character(c14$X23)
c16$coalmem <- as.character(c16$X23)

coalmem <- c(c00$coalmem,c02$coalmem,c04$coalmem, c06$coalmem, c08$coalmem, c10$coalmem, c12$coalmem, c14$coalmem, c16$coalmem)


# Coalition name
c00$coal <- as.character(c00$X23)
c04$coal <- as.character(c04$X23)
c08$coal <- as.character(c08$X23)
c12$coal <- as.character(c12$X24)

c02$coal <- as.character(c02$X23)
c06$coal <- as.character(c06$X23)
c10$coal <- as.character(c10$X23)
c14$coal <- as.character(c14$X24)
c16$coal <- as.character(c16$X24)

coal <- c(c00$coal,c02$coal,c04$coal, c06$coal, c08$coal, c10$coal, c12$coal, c14$coal, c16$coal)


# JOB CODE
c00$jobcode <- c00$X24
c04$jobcode <- c04$X24
c08$jobcode <- c08$X24
c12$jobcode <- c12$X25

c02$jobcode <- c02$X24
c06$jobcode <- c06$X24
c10$jobcode <- c10$X24
c14$jobcode <- c14$X25
c16$jobcode <- c16$X25

jobcode <- c(c00$jobcode,c02$jobcode,c04$jobcode, c06$jobcode, c08$jobcode, c10$jobcode, c12$jobcode, c14$jobcode, c16$jobcode)
length(jobcode)


# JOB NAME
c00$jobname <- c00$X25
c00$jobname <- as.character(c00$jobname)

c04$jobname <- c04$X25
c04$jobname <- as.character(c04$jobname)

c08$jobname <- c08$X25
c08$jobname <- as.character(c08$jobname)

## Fixing some idiosyncrasies in the 2012 data
fix <- data.frame(c12$X25,c12$X26) 
fix$c12.X25 <- as.character(fix$c12.X25)
fix$c12.X26 <- as.character(fix$c12.X26)
head(fix)
fix$code<-grepl("[[:digit:]]", fix$c12.X25)
fix$jobname <- rep(0, length(fix$code))
fix$jobname[fix$code==TRUE] <- fix$c12.X26[fix$code==TRUE]
fix$jobname[fix$code==FALSE] <- fix$c12.X25[fix$code==FALSE]

c12$jobname <- fix$jobname
c12$jobname <- as.character(c12$jobname)

c02$jobname <- c02$X25
c02$jobname <- as.character(c02$jobname)

c06$jobname <- c06$X25
c06$jobname <- as.character(c06$jobname)

c10$jobname <- c10$X25
c10$jobname <- as.character(c10$jobname)

c14$jobname <- c14$X26
c14$jobname <- as.character(c14$jobname)

c16$jobname <- c16$X26
c16$jobname <- as.character(c16$jobname)

jobname <- c(c00$jobname,c02$jobname,c04$jobname, c06$jobname, c08$jobname, c10$jobname, c12$jobname, c14$jobname, c16$jobname)

## Getting rid of accents in job names
# Upper case
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","E",jobname)
jobname <- gsub("�","I",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","U",jobname)
jobname <- gsub("�","C",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","E",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","U",jobname)
# Lower case
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","E",jobname)
jobname <- gsub("�","I",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","U",jobname)
jobname <- gsub("�","C",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","A",jobname)
jobname <- gsub("�","E",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","O",jobname)
jobname <- gsub("�","U",jobname)

# SEX
c00$sex <- c00$X30
c00$sex <- as.character(c00$sex)

c04$sex <- c04$X30
c04$sex <- as.character(c04$sex)

c08$sex <- c08$X30
c08$sex <- as.character(c08$sex)

c12$sex <- c12$X31
c12$sex <- as.character(c12$sex)

c02$sex <- c02$X30
c02$sex <- as.character(c02$sex)

c06$sex <- c06$X30
c06$sex <- as.character(c06$sex)

c10$sex <- c10$X30
c10$sex <- as.character(c10$sex)

c14$sex <- c14$X31
c14$sex <- as.character(c14$sex)

c16$sex <- c16$X31
c16$sex <- as.character(c16$sex)

sex <- c(c00$sex,c02$sex,c04$sex, c06$sex, c08$sex, c10$sex, c12$sex, c14$sex, c16$sex )
length(sex)
table(sex) # Note there are some miscodings from the raw data here

# EDUCATION NAME
c00$eduname <- c00$X32
c00$eduname <- as.character(c00$eduname)

c04$eduname <- c04$X32
c04$eduname <- as.character(c04$eduname)

c08$eduname <- c08$X32
c08$eduname <- as.character(c08$eduname)

c12$eduname <- c12$X33
c12$eduname <- as.character(c12$eduname)

c02$eduname <- c02$X32
c02$eduname <- as.character(c02$eduname)

c06$eduname <- c06$X32
c06$eduname <- as.character(c06$eduname)

c10$eduname <- c10$X32
c10$eduname <- as.character(c10$eduname)

c14$eduname <- c14$X33
c14$eduname <- as.character(c14$eduname)

c16$eduname <- c16$X33
c16$eduname <- as.character(c16$eduname)

eduname <- c(c00$eduname,c02$eduname,c04$eduname, c06$eduname, c08$eduname, c10$eduname, c12$eduname, c14$eduname, c16$eduname )


## BIRTHDAY - Inconsistency in formatting in the raw data means this needs quite a bit of attention to make it all consistent
#Fixing 2000, 2002, 2004
head(c00$X26, n=35) #OK
str_sub(c00$X26, start = -4, end = 0) <- "/"
str_sub(c00$X26, start = -7, end = 0) <- "/"
head(c00$X26, n=35) #OK

head(c02$X26, n=35) #OK
str_sub(c02$X26, start = -4, end = 0) <- "/"
str_sub(c02$X26, start = -7, end = 0) <- "/"
head(c02$X26, n=35) #OK

head(c04$X26, n=35) #OK
str_sub(c04$X26, start = -4, end = 0) <- "/"
str_sub(c04$X26, start = -7, end = 0) <- "/"
head(c04$X26, n=35) #OK

#Add zeros to the start
str_sub(c00$X26[nchar(c00$X26)==9 & !is.na(c00$X26)], start = 1, end = 0) <- "0"
head(c00$X26)

str_sub(c02$X26[nchar(c02$X26)==9], start = 1, end = 0) <- "0"
head(c02$X26)

str_sub(c04$X26[nchar(c04$X26)==9], start = 1, end = 0) <- "0"
head(c04$X26)


# Fixing 2008 and 2010
c08$X26<-gsub("-","/",c08$X26)
c10$X26<-gsub("-","/",c10$X26)

c08$X26<-gsub("Jan","01",c08$X26)
c08$X26<-gsub("Feb","02",c08$X26)
c08$X26<-gsub("Mar","03",c08$X26)
c08$X26<-gsub("Apr","04",c08$X26)
c08$X26<-gsub("May","05",c08$X26)
c08$X26<-gsub("Jun","06",c08$X26)
c08$X26<-gsub("Jul","07",c08$X26)
c08$X26<-gsub("Aug","08",c08$X26)
c08$X26<-gsub("Sep","09",c08$X26)
c08$X26<-gsub("Oct","10",c08$X26)
c08$X26<-gsub("Nov","11",c08$X26)
c08$X26<-gsub("Dec","12",c08$X26)

c10$X26<-gsub("Jan","01",c10$X26)
c10$X26<-gsub("Feb","02",c10$X26)
c10$X26<-gsub("Mar","03",c10$X26)
c10$X26<-gsub("Apr","04",c10$X26)
c10$X26<-gsub("May","05",c10$X26)
c10$X26<-gsub("Jun","06",c10$X26)
c10$X26<-gsub("Jul","07",c10$X26)
c10$X26<-gsub("Aug","08",c10$X26)
c10$X26<-gsub("Sep","09",c10$X26)
c10$X26<-gsub("Oct","10",c10$X26)
c10$X26<-gsub("Nov","11",c10$X26)
c10$X26<-gsub("Dec","12",c10$X26)

# Then Add 19s to the start 
head(c08$X26)
str_sub(c08$X26, start = -2, end = 0) <- "19"
head(c08$X26)

head(c10$X26)
str_sub(c10$X26, start = -2, end = 0) <- "19"
head(c10$X26)

# Then Add 0's the start where necessary
str_sub(c08$X26[nchar(c08$X26)==9], start = 1, end = 0) <- "0"
head(c08$X26)

str_sub(c10$X26[nchar(c10$X26)==9], start = 1, end = 0) <- "0"
head(c10$X26)

## Fixing 2006, 2012, 2104
# Making sure there are tw0 digits for all months
head(c06$X26)
c06$X26 <- as.character(c06$X26) 
c06$X26a<-as.character(gregexpr(pattern ='/',c06$X26))
c06$X26a<-gsub("[^0-9.]", "", c06$X26a) 
str_sub(c06$X26[c06$X26a==24], start = -6, end = 0) <- "0"
head(c06$X26)

head(c12$X27) #New format - similar to first three
c12$X27 <- as.character(c12$X27) 
c12$X27a<-as.character(gregexpr(pattern ='/',c12$X27))
c12$X27a<-gsub("[^0-9.]", "", c12$X27a) 
str_sub(c12$X27[c12$X27a==24], start = -6, end = 0) <- "0"
head(c12$X27)

head(c14$X27) #New format - similar to first three
c14$X27 <- as.character(c14$X27) 
c14$X27a<-as.character(gregexpr(pattern ='/',c14$X27))
c14$X27a<-gsub("[^0-9.]", "", c14$X27a) 
str_sub(c14$X27[c14$X27a==24], start = -6, end = 0) <- "0"
head(c14$X27)

# Making sure there are two digits for each day
str_sub(c06$X26[nchar(c06$X26)==9], start = 1, end = 0) <- "0"
head(c06$X26)

str_sub(c12$X27[nchar(c12$X27)==9], start = 1, end = 0) <- "0"
head(c12$X27)

str_sub(c14$X27[nchar(c14$X27)==9], start = 1, end = 0) <- "0"
head(c14$X27)

c16$X27 <- as.character(c16$X27)

birthday <- c(c00$X26,c02$X26,c04$X26, c06$X26, c08$X26, c10$X26, c12$X27, c14$X27, c16$X27)
head(birthday, n=100)

## BIRTH STATE
birthstate <- c(as.character(c00$X37), as.character(c02$X37), as.character(c04$X37), as.character(c06$X37), as.character(c08$X37),
                as.character(c10$X37), as.character(c12$X38), as.character(c14$X40), as.character(c16$X40))
table(birthstate)

## BIRTH MUNICIPALITY
birthmuni <- c(as.character(c00$X39), as.character(c02$X39), as.character(c04$X39), as.character(c06$X39), as.character(c08$X39),
               as.character(c10$X39), as.character(c12$X40), as.character(c14$X42),as.character(c16$X42))

## Getting rid of accents in birth municipalities
# Upper case
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","E",birthmuni)
birthmuni <- gsub("�","I",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","U",birthmuni)
birthmuni <- gsub("�","C",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","E",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","U",birthmuni)
# Lower case
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","E",birthmuni)
birthmuni <- gsub("�","I",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","U",birthmuni)
birthmuni <- gsub("�","C",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","A",birthmuni)
birthmuni <- gsub("�","E",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","O",birthmuni)
birthmuni <- gsub("�","U",birthmuni)

length(birthmuni)

# Result - did they win or lose?
result <- c(as.character(c00$X42), as.character(c02$X42), as.character(c04$X42), as.character(c06$X42), as.character(c08$X42),
			as.character(c10$X42), as.character(c12$X43), as.character(c14$X45),as.character(c16$X45))

## Getting rid of accents in results
# Upper case
result <- gsub("�","A",result)
result <- gsub("�","A",result)
result <- gsub("�","E",result)
result <- gsub("�","I",result)
result <- gsub("�","O",result)
result <- gsub("�","U",result)
result <- gsub("�","C",result)
result <- gsub("�","A",result)
result <- gsub("�","A",result)
result <- gsub("�","A",result)
result <- gsub("�","E",result)
result <- gsub("�","O",result)
result <- gsub("�","O",result)
result <- gsub("�","U",result)
# Lower case
result <- gsub("�","A",result)
result <- gsub("�","A",result)
result <- gsub("�","E",result)
result <- gsub("�","I",result)
result <- gsub("�","O",result)
result <- gsub("�","U",result)
result <- gsub("�","C",result)
result <- gsub("�","A",result)
result <- gsub("�","A",result)
result <- gsub("�","E",result)
result <- gsub("�","O",result)
result <- gsub("�","O",result)
result <- gsub("�","U",result)

# Making all these now-cleaned variables into a single master dataframe
df <- data.frame(year, state, round, lcode, municipality, cargocode, cargo, name, ballot, 
	party, coalmem, coal, jobcode, jobname, sex, eduname, birthmuni, birthstate, birthday, result)

# Check the classes and see that some are not the class that they should be
sapply(df, class) # What needs to be changed?
# Now make these changes so everything is character that I want to be character
df$year <- as.character(df$year)
df$state <- as.character(df$state)
df$municipality <- as.character(df$municipality)
df$ballot <- as.character(df$ballot)
df$party <- as.character(df$party)
df$jobname <- as.character(df$jobname)
df$sex <- as.character(df$sex)
df$eduname <- as.character(df$eduname)
df$birthmuni <- as.character(df$birthmuni)
df$birthstate <- as.character(df$birthstate)
df$birthday <- as.character(df$birthday)
df$result <- as.character(df$result)
df$coalmem <- as.character(df$coalmem)
df$coal <- as.character(df$coal)
df$name <- as.character(df$name)
df$cargo <- as.character(df$cargo)
sapply(df, class) #Check that I was successful in changing those things


####################################
# Cleaning of Master Raw Dataframe #
####################################
# Changing to character where necessary
df$year <- as.character(df$year)
df$state <- as.character(df$state)
df$municipality <- as.character(df$municipality)
df$ballot <- as.character(df$ballot)
df$party <- as.character(df$party)
df$jobname <- as.character(df$jobname)
df$sex <- as.character(df$sex)
df$eduname <- as.character(df$eduname)
df$birthmuni <- as.character(df$birthmuni)
df$birthstate <- as.character(df$birthstate)
df$birthday <- as.character(df$birthday)
df$result <- as.character(df$result)
df$coalmem <- as.character(df$coalmem)
df$coal <- as.character(df$coal)
df$name <- as.character(df$name)
df$cargo <- as.character(df$cargo)
sapply(df, class) #Successful in changing those things?

# Fixing sex
df$sex<-sex
df$sex[df$sex=="FEMININO"] <- "FEMALE"
df$sex[df$sex=="MASCULINO"] <- "MALE"
df$sex[df$sex!="MALE" & df$sex!="FEMALE"] <- "999" #single code for micosded sex
table(df$sex) # Just under 30k miscodings

# Fixing miscoded birthstate
table(df$birthstate)
df$birthstate[df$birthstate==""] <- "999"
df$birthstate[df$birthstate=="-1"] <- "999"
df$birthstate[df$birthstate=="BRASILEIRA NATA"] <- "999"
df$birthstate[df$birthstate==" "] <- "999"
table(df$birthstate)

# Include DF deputies with state deputies
df$cargocode[df$cargocode==8] <- 7
df$cargo[df$cargo=="DEPUTADO DISTRITAL"] <- "DEPUTADO ESTADUAL"

# Changing party names written out as whole names to make everything consistent across candidates
df$party[df$party=="DEMOCRATAS"] <- "DEM"
df$party[df$party=="PARTIDO COMUNISTA BRASILEIRO"] <- "PCB"
df$party[df$party=="PARTIDO COMUNISTA DO BRASIL"] <- "PC do B"
df$party[df$party=="PARTIDO DA MOBILIZACAO NACIONAL"] <- "PMN"
df$party[df$party=="PARTIDO DA REPUBLICA"] <- "PR"
df$party[df$party=="PARTIDO DA SOCIAL DEMOCRACIA BRASILEIRA"] <- "PSDB"
df$party[df$party=="PARTIDO DEMOCRATICO TRABALHISTA"] <- "PDT"
df$party[df$party=="PARTIDO DO MOVIMENTO DEMOCRATICO BRASILEIRO"] <- "PMDB"
df$party[df$party=="PARTIDO DOS TRABALHADORES"] <- "PT"
df$party[df$party=="PARTIDO HUMANISTA DA SOLIDARIEDADE"] <- "PHS"
df$party[df$party=="PARTIDO PATRIA LIVRE"] <- "PPL"
df$party[df$party=="PARTIDO POPULAR SOCIALISTA"] <- "PPS"
df$party[df$party=="PARTIDO PROGRESSISTA"] <- "PP"
df$party[df$party=="PARTIDO RENOVADOR TRABALHISTA BRASILEIRO"] <- "PRTB"
df$party[df$party=="PARTIDO REPUBLICANO BRASILEIRO"] <- "PRB"
df$party[df$party=="PARTIDO REPUBLICANO PROGRESSISTA"] <- "PRP"
df$party[df$party=="PARTIDO SOCIAL CRISTAO"] <- "PSC"
df$party[df$party=="PARTIDO SOCIAL DEMOCRATA CRISTAO"] <- "PSDC"
df$party[df$party=="PARTIDO SOCIAL DEMOCRATICO"] <- "PSD"
df$party[df$party=="PARTIDO SOCIAL LIBERAL"] <- "PSL"
df$party[df$party=="PARTIDO SOCIAL TRABALHISTA"] <- "PST"
df$party[df$party=="PARTIDO SOCIALISMO E LIBERDADE"] <- "PSOL"
df$party[df$party=="PARTIDO DA REPUBLICA"] <- "PR"
df$party[df$party=="PARTIDO SOCIALISTA BRASILEIRO"] <- "PSB"
df$party[df$party=="PARTIDO SOCIALISTA DOS TRABALHADORES UNIFICADO"] <- "PSTU"
df$party[df$party=="PARTIDO TRABALHISTA BRASILEIRO"] <- "PTB"
df$party[df$party=="PARTIDO TRABALHISTA CRISTAO"] <- "PTC"
df$party[df$party=="PARTIDO TRABALHISTA DO BRASIL"] <- "PT do B"
df$party[df$party=="PARTIDO DA REPUBLICA"] <- "PR"
df$party[df$party=="PARTIDO TRABALHISTA NACIONAL"] <- "PTN"
df$party[df$party=="PARTIDO VERDE"] <- "PV"
df$party[df$party=="PARTIDO PROGRESSISTA BRASILEIRO"] <- "PP"
df$party[df$party=="PARTIDO DA CAUSA OPERARIA"] <- "PCO"
df$party[df$party=="PARTIDO DA FRENTE LIBERAL"] <- "PFL"
df$party[df$party=="PARTIDO DA RECONSTRUCAO NACIONAL"] <- "PRN"
df$party[df$party=="PARTIDO DA REEDIFICACAO DA ORDEM NACIONAL"] <- "PRONA"
df$party[df$party=="PARTIDO DE REEDIFICACAO DA ORDEM NACIONAL"] <- "PRONA"
df$party[df$party=="PARTIDO DOS APOSENTADOS DA NACAO"] <- "PAN"
df$party[df$party=="PARTIDO GERAL DOS TRABALHADORES"] <- "PGT"
df$party[df$party=="PARTIDO GERAL DOS TRABALHADORES"] <- "PGT"
df$party[df$party=="PARTIDO LIBERAL"] <- "PL"
df$party[df$party=="PRESTCONTAS"] <- ""

# Identifying miscoded party names
df$party[df$party==""] <- "999"
df$party[df$party=="20"] <- "999"
df$party[df$party=="NAO"] <- "999"
df$party[df$party=="SIM"] <- "999"

# Recoding education into four main groups to fix inconsistencies in how these are written
# Less than middle school
df$eduname[df$eduname=="1� GRAU COMPLETO"] <- 1 
df$eduname[df$eduname=="1� GRAU INCOMPLETO"] <- 1 
df$eduname[df$eduname=="2� GRAU COMPLETO"] <- 1 
df$eduname[df$eduname=="2� GRAU INCOMPLETO"] <- 1 
df$eduname[df$eduname=="ANALFABETO"] <- 1 
df$eduname[df$eduname=="ENSINO FUNDAMENTAL INCOMPLETO"] <- 1 
df$eduname[df$eduname=="FUNDAMENTAL INCOMPLETO"] <- 1 
df$eduname[df$eduname=="L� E ESCREVE"] <- 1 

# Middle School
df$eduname[df$eduname=="ENSINO FUNDAMENTAL COMPLETO"] <- 2 
df$eduname[df$eduname=="FUNDAMENTAL COMPLETO"] <- 2 
df$eduname[df$eduname=="ENSINO M�DIO INCOMPLETO"] <- 2 
df$eduname[df$eduname=="M�DIO INCOMPLETO"] <- 2 

# High School
df$eduname[df$eduname=="ENSINO M�DIO COMPLETO"] <- 3 
df$eduname[df$eduname=="M�DIO COMPLETO"] <- 3 

# Any College
df$eduname[df$eduname=="SUPERIOR COMPLETO"] <- 4 
df$eduname[df$eduname=="SUPERIOR INCOMPLETO"] <- 4 

# Fixing miscoded education variables
table(df$eduname)
df$eduname[df$eduname=="5"] <- 999 
df$eduname[df$eduname=="7"] <- 999 
df$eduname[df$eduname=="9"] <- 999 
df$eduname[df$eduname=="N�O INFORMADO"] <- 999 
df$eduname[df$eduname=="ELEITO"] <- 999 
df$eduname[df$eduname==""] <- 999 
table(df$eduname)

###############################################
# GENERATING NEW VARIABLES ####################
###############################################
####################
## Election Winners#
####################
table(df$result)
df$win <- rep(0, length(df$year))
df$win[df$result=="ELEITO" | df$result=="ELEITO POR QP" |df$result=="ELEITO POR MEDIA"|df$result=="ELEITO POR QUOCIENTE PARTIDARIO"]<-1
table(df$win)
################################
###EVANGELICAL TITLES#############
################################
# Prophet in title 
df$prophet <-grepl("PROFETA",df$ballot)
length(df$prophet[df$prophet==1])

# Pastor in title
df$pastor <-grepl("PASTOR",df$ballot)
length(df$pastor[df$pastor==1])

# Bishop in title
df$bishop <-grepl("^BISP",df$ballot)
length(df$bishop[df$bishop==1])

# Missionary (any variation) in title*
df$missionary <-grepl("MISSION",df$ballot)
length(df$missionary[df$missionary==1])

# Presbyter in title 
df$presbyter <-grepl("PRESBITER",df$ballot)
length(df$presbyter[df$presbyter==1])

# Apostle in title
df$apostle <-grepl("APOSTOL",df$ballot)
length(df$apostle[df$apostle==1])

# Padre in title
df$padre <-grepl("PADRE",df$ballot)
length(df$padre[df$padre==1])

# Frei in title
df$frei <-grepl("FREI ",df$ballot)
length(df$frei[df$frei==1])

# Generate any Prot title 
df$title <- df$prophet==1 | df$pastor==1 | df$bishop==1 | df$missionary==1 | df$presbyter==1 | df$apostle==1
length(df$title[df$title==1])

# Remove those that are clearly just names rather than Evangelical monikers - this was based on manual check of all Evangelical ballot names
df$title[grepl("PASTORAL",df$ballot, fixed = TRUE)] <- "FALSE"
df$title[grepl("PASTORI",df$ballot, fixed = TRUE)] <- "FALSE"
df$title[grepl("PASTORE",df$ballot, fixed = TRUE)] <- "FALSE"

# Generate any Catholic title 
df$cathtitle <- df$padre==1 | df$frei==1 
length(df$cathtitle[df$cathtitle==1])

###################################
##NON-RELIGIOUS TITLES#############
###################################
# Professor
df$professor <-grepl("PROFESSOR",df$ballot)
length(df$pastor[df$professor==1])

# Doctor
df$doutor <-grepl("DOUTOR",df$ballot)
length(df$pastor[df$doutor==1])

# Any non-religious title
df$anynonrel <- df$professor==1 | df$doutor==1
length(df$pastor[df$anynonrel==1])

# Any title
df$anytitle <- df$title==1 | df$professor==1 | df$doutor==1 | df$padre==1
length(df$pastor[df$anytitle==1])

###################################
### Employment Dummies ############
###################################
# Religious Leader
df$reljob <- rep(0,length(df$year))
df$reljob[grepl("SACER",df$jobname)] <- 1
table(df$reljob)
# Doctor
df$docjob <- rep(0,length(df$year))
df$docjob[df$jobname=="MEDICO"] <- 1
table(df$docjob)
# Professor
df$profjob <- rep(0,length(df$year))
df$profjob[grepl("PROFESSOR",df$jobname)] <- 1
table(df$profjob)

###############################################################################
## Generating dummy for whether running for majoritarian or proportional race #
###############################################################################
df$prop <- df$cargocode==7 | df$cargocode==6 | df$cargocode==13
table(df$prop)

###############################################################################
## Generating dummy for whether municipal race, state race, national race #####
###############################################################################
df$level<- rep("",length(df$cargo))
df$level[df$cargocode==11 |df$cargocode==12 | df$cargocode==13]<-"MUNICIPAL"
df$level[df$cargocode==1 |df$cargocode==2]<-"NATIONAL"
df$level[df$cargocode==91]<-"NONE"
df$level[df$level==""]<-"STATE"

##############################
## Only keep municipal races #
##############################
length(df$level)
df <- df[df$level=="MUNICIPAL",]
length(df$level)

##############################################################################
## Remove candidates in second round since this would cause double counting ##
##############################################################################
df <- df[df$round==1,]

###############################################
### Unique candidate identifiers ##############
###############################################
df$id <-  as.numeric(as.factor(with(df, paste(name, birthday, birthmuni, birthstate, sep="_"))))


#################################################
## Count the number of times the candidate runs #
#################################################
df$numbruns <- 0
ids <- unique(df$id)
for (i in 1:length(ids)){
df$numbruns[df$id==ids[i]]<-length(df$year[df$id==ids[i]])  
}

#############################
# Code if ran multiple times #
#############################
df$multipleruns <- 0
df$multipleruns[df$numbruns>1] <- 1
table(df$multipleruns)

###################################################
## Ever registered as religious leader or doctor ##
###################################################
# For religious leader
df$everreljob <- rep(NA,length(df$prop))
df$everreljob[df$multipleruns==FALSE & df$reljob==0] <- 0 #Those who run in one race not as a religious leader
df$everreljob[df$reljob==1] <- 1 #Those who run in at least one race as a religous leader

x <- unique(df$id[df$multipleruns==TRUE])
for (i in x){
  df$everreljob[df$id==i]<-length(unique(df$reljob[df$id==i]))
}
table(df$everreljob)

# Recode based on loop results to get desired variable
df$everreljob[df$everreljob==1 & df$reljob==0] <- 0 #Those that run in multiple races but never register as religious leader
df$everreljob[df$everreljob==2] <- 1 #Those who run in at least one race as a relgious leader and at least one other race as not a religious leader
table(df$everreljob)

## For Doctors
df$everdocjob <- rep(NA,length(df$prop))
df$everdocjob[df$multipleruns==FALSE & df$docjob==0] <- 0
df$everdocjob[df$docjob==1] <- 1
x <- unique(df$id[df$multipleruns==TRUE])
for (i in x){
  df$everdocjob[df$id==i]<-length(unique(df$docjob[df$id==i]))
}
table(df$everdocjob)


############################################
## Generating ID for every race ############
############################################
df$racetype[df$cargocode==11 | df$cargocode==12]<-"MAYORAL"
df$racetype[df$cargocode==13]<-"CITY COUNCIL"
table(df$racetype)

df$raceid <- rep(NA,length(df$year))
df$raceid[df$racetype=="MAYORAL"] <- length(unique(df$raceid)) - 1 + as.numeric(as.factor(with(df[df$racetype=="MAYORAL",], paste(year, lcode, sep="_"))))
df$raceid[df$racetype=="CITY COUNCIL"] <- length(unique(df$raceid)) - 1 + as.numeric(as.factor(with(df[df$racetype=="CITY COUNCIL",], paste(year, lcode, sep="_"))))

#####################################################
## Calculating number of candidates in every race ###
#####################################################
df$numcandidates <- rep(0,length(df$year))
function1 <- function(x) {length(df$raceid[df$raceid==x])}
df$numcandidates <- sapply(df$raceid,function1)
table(df$numcandidates)

#############################################################################
## Calculate the time they are running - this takes a little while to run ###
#############################################################################
ids <- unique(df$id)
df$timerun <- rep(NA,length(df$municipality))
for (i in 1:length(ids)) {
  years<-unique(df$year[df$id==ids[i]])
  
  for (j in 1:length(years)){
    df$timerun[df$id==ids[i] & df$year==years[j]] <- match(years[j],years)
    
  }  
}


##############################################################
### Unique candidate / municipality identifiers ##############
##############################################################
df$id2 <-  as.numeric(as.factor(with(df, paste(id, lcode, sep="_"))))
head(df$id2, n=300)
summary(df$id)
length(unique(df$id))
length(unique(df$id2))

####################################################
## Calculate both variable for same municipality ###
####################################################
df$both2 <- rep(NA,length(df$prop))
df$both2[df$multipleruns==0] <- 0
x <- unique(df$id2[df$multipleruns==1])
for (i in x){
  df$both2[df$id2==i]<-length(unique(df$prop[df$id2==i]))
}
df$both2[df$both2==1] <- 0 # Ran multiple times but always under the same kind of rules
df$both2[df$both2==2] <- 1 # Ran under different rules

# Write to CSV
#write.csv(df,"df_intermediary_new.csv")

# Loading it back in for check
setwd("//adsroot.itcs.umich.edu/home/hursre/Documents/Papers/Evangelical Signalling/Data")
#df<-read.csv("df_intermediary_new.csv")


################################################################################################
# Need to make some changes to municipality names so they will merge later on with census data #
################################################################################################ 
df$municipality[df$municipality=="ASSU" & df$state=="RN"] <- "ACU"
df$municipality[df$municipality=="ALTA FLORESTA DO OESTE" & df$state=="RO"] <- "ALTA FLORESTA D'OESTE"
df$municipality[df$municipality=="ALVORADA DO OESTE" & df$state=="RO"] <- "ALVORADA D'OESTE"
df$municipality[df$municipality=="AUGUSTO SEVERO" & df$state=="RN"] <- "CAMPO GRANDE"
df$municipality[df$municipality=="FLORINEA" & df$state=="SP"] <- "FLORINIA"
df$municipality[df$municipality=="GRACCHO CARDOSO" & df$state=="SE"] <- "GRACHO CARDOSO"
df$municipality[df$municipality=="LAGOA DE ITAENGA" & df$state=="PE"] <- "LAGOA DO ITAENGA"
df$municipality[df$municipality=="MACHADINHO DO OESTE" & df$state=="BA"] <- "MACHADINHO D'OESTE"
df$municipality[df$municipality=="MUQUEM DO SAO FRANCISCO" & df$state=="BA"] <- "MUQUEM DE SAO FRANCISCO"
df$municipality[df$municipality=="NOVA BRASILANDIA DO OESTE" & df$state=="RO"] <- "NOVA BRASILANDIA D'OESTE"
df$municipality[df$municipality=="PRESIDENTE CASTELO BRANCO" & df$state=="SC"] <- "PRESIDENTE CASTELLO BRANCO"
df$municipality[df$municipality=="SANTA LUIZA DO OESTE" & df$state=="RO"] <- "SANTA LUIZA D'OESTE"
df$municipality[df$municipality=="SAO FELIPE DO OESTE" & df$state=="RO"] <- "SAO FELIPE D'OESTE"
df$municipality[df$municipality=="SAO FELIPE D OESTE" & df$state=="RO"] <- "SAO FELIPE D'OESTE"
df$municipality[df$municipality=="AGUA QUENTE" & df$state=="BA"] <- "ERICO CARDOSO"
df$municipality[df$municipality=="ALMERIM" & df$state=="PA"] <- "ALMEIRIM"
df$municipality[df$municipality=="ALTO DA BOA VISTA" & df$state=="MT"] <- "ALTO BOA VISTA"
df$municipality[df$municipality=="AMPARO DO SAO FRANCISCO" & df$state=="SE"] <- "AMPARO DE SAO FRANCISCO"
df$municipality[df$municipality=="APARECIDA D OESTE" & df$state=="SP"] <- "APARECIDA D'OESTE"
df$municipality[df$municipality=="APARECIDA DO TABUADO" & df$state=="MS"] <- "APARECIDA DO TABOADO"
df$municipality[df$municipality=="ARMACAO DE BUZIOS" & df$state=="RJ"] <- "ARMACAO DOS BUZIOS"
df$municipality[df$municipality=="BALNEARIO DE BARRA DO SUL" & df$state=="SC"] <- "BALNEARIO BARRA DO SUL"
df$municipality[df$municipality=="BALNEARIO DE CAMBORIU" & df$state=="SC"] <- "BALNEARIO CAMBORIU"
df$municipality[df$municipality=="BATAGUACU" & df$state=="MS"] <- "BATAGUASSU"
df$municipality[df$municipality=="BATAIPORA" & df$state=="MS"] <- "BATAYPORA"
df$municipality[df$municipality=="BERNADINO DE CAMPOS" & df$state=="SP"] <- "BERNARDINO DE CAMPOS"
df$municipality[df$municipality=="BOA SAUDE" & df$state=="RN"] <- "JANUARIO CICCO"
df$municipality[df$municipality=="BOA VISTA DE RAMOS" & df$state=="AM"] <- "BOA VISTA DO RAMOS"
df$municipality[df$municipality=="CANINDE DO SAO FRANCISCO" & df$state=="SE"] <- "CANINDE DE SAO FRANCISCO"
df$municipality[df$municipality=="CHAPADA DA AREIA" & df$state=="TO"] <- "CHAPADA DE AREIA"
df$municipality[df$municipality=="CHIAPETA" & df$state=="RS"] <- "CHIAPETTA"
df$municipality[df$municipality=="CONCEICAO DA PEDRA" & df$state=="MG"] <- "CONCEICAO DAS PEDRAS"
df$municipality[df$municipality=="CONSELHEIRO MAYRINCK" & df$state=="PR"] <- "CONSELHEIRO MAIRINCK"
df$municipality[df$municipality=="DEP IRAPUAN PINHEIRO" & df$state=="CE"] <- "DEPUTADO IRAPUAN PINHEIRO"
df$municipality[df$municipality=="DIAMANTE DO OESTE" & df$state=="PR"] <- "DIAMANTE D'OESTE"
df$municipality[df$municipality=="DIAS D AVILA" & df$state=="BA"] <- "DIAS D'AVILA"
df$municipality[df$municipality=="ELDORADO DO CARAJAS" & df$state=="PA"] <- "ELDORADO DOS CARAJAS"
df$municipality[df$municipality=="ESPIGAO DO OESTE" & df$state=="RO"] <- "ESPIGAO D'OESTE"
df$municipality[df$municipality=="ESPIRITO SANTO DO OESTE" & df$state=="RN"] <- "ESPIRITO SANTO"
df$municipality[df$municipality=="ESTRELA D OESTE" & df$state=="SP"] <- "ESTRELA D'OESTE"
df$municipality[df$municipality=="EUZEBIO" & df$state=="CE"] <- "EUSEBIO"
df$municipality[df$municipality=="FERNANDO PEDROSA" & df$state=="RN"] <- "FERNANDO PEDROZA"
df$municipality[df$municipality=="FIGUEIROPOLES D'OESTE" & df$state=="MT"] <- "FIGUEIROPOLIS D'OESTE"
df$municipality[df$municipality=="GOUVEA" & df$state=="MG"] <- "GOUVEIA"
df$municipality[df$municipality=="GOVERNADOR EDSON LOBAO" & df$state=="MA"] <- "GOVERNADOR EDISON LOBAO"
df$municipality[df$municipality=="GOVERNADOR LOMANTO JUNIOR" & df$state=="BA"] <- "BARRO PRETO"
df$municipality[df$municipality=="GRANGEIRO" & df$state=="CE"] <- "GRANJEIRO"
df$municipality[df$municipality=="GUARANI D OESTE" & df$state=="SP"] <- "GUARANI D'OESTE"
df$municipality[df$municipality=="HERVAL DO OESTE" & df$state=="SC"] <- "HERVAL D'OESTE"
df$municipality[df$municipality=="IPAUCU" & df$state=="SP"] <- "IPAUSSU"
df$municipality[df$municipality=="ITABIRINHA DE MANTENA" & df$state=="MO"] <- "ITABIRINHA"
df$municipality[df$municipality=="ITAGUAGE" & df$state=="PR"] <- "ITAGUAJE"
df$municipality[df$municipality=="ITAMOJI" & df$state=="MG"] <- "ITAMOGI"
df$municipality[df$municipality=="ITAPEJARA DO OESTE" & df$state=="PR"] <- "ITAPEJARA D'OESTE"
df$municipality[df$municipality=="ITAPORANGA D AJUDA" & df$state=="SE"] <- "ITAPORANGA D'AJUDA"
df$municipality[df$municipality=="JUTY" & df$state=="MS"] <- "JUTI"
df$municipality[df$municipality=="LAGEADO" & df$state=="TO"] <- "LAJEADO"
df$municipality[df$municipality=="LAGEADO GRANDE" & df$state=="SC"] <- "LAJEADO GRANDE"
df$municipality[df$municipality=="LAGOA D ANTA" & df$state=="RN"] <- "LAGOA D'ANTA"
df$municipality[df$municipality=="LUIS ALVES" & df$state=="SC"] <- "LUIZ ALVES"
df$municipality[df$municipality=="LUIS DOMINGUES DO MARANHAO" & df$state=="MA"] <- "LUIS DOMINGUES"
df$municipality[df$municipality=="LUISIANIA" & df$state=="PR"] <- "LUIZIANA"
df$municipality[df$municipality=="LUISIANIA" & df$state=="SP"] <- "LUIZIANIA"
df$municipality[df$municipality=="MAE D AGUA" & df$state=="PB"] <- "MAE D'AGUA"
df$municipality[df$municipality=="MANUEL URBANO" & df$state=="AC"] <- "MANOEL URBANO"
df$municipality[df$municipality=="MOGI MIRIM" & df$state=="SP"] <- "MOJI MIRIM"
df$municipality[df$municipality=="MOJI GUACU" & df$state=="SP"] <- "MOGI GUACU"
df$municipality[df$municipality=="MOREIRA SALLES" & df$state=="PR"] <- "MOREIRA SALES"
df$municipality[df$municipality=="MUNDO NOVO DE GOIAS" & df$state=="GO"] <- "MUNDO NOVO"
df$municipality[df$municipality=="NOVO AYRAO" & df$state=="AM"] <- "NOVO AIRAO"
df$municipality[df$municipality=="OLHO D AGUA" & df$state=="PB"] <- "OLHO D'AGUA"
df$municipality[df$municipality=="OLHO D AGUA DAS CUNHAS" & df$state=="MA"] <- "OLHO D'AGUA DAS CUNHAS"
df$municipality[df$municipality=="OLHO D AGUA DAS FLORES" & df$state=="AL"] <- "OLHO D'AGUA DAS FLORES"
df$municipality[df$municipality=="OLHO D AGUA DO CASADO" & df$state=="AL"] <- "OLHO D'AGUA DO CASADO"
df$municipality[df$municipality=="OLHO D AGUA GRANDE" & df$state=="AL"] <- "OLHO D'AGUA GRANDE"
df$municipality[df$municipality=="OLHO D AGUA DO PIAUI" & df$state=="PI"] <- "OLHO D'AGUA DO PIAUI"
df$municipality[df$municipality=="OLHO DAGUA DO PIAUI" & df$state=="PI"] <- "OLHO D'AGUA DO PIAUI"
df$municipality[df$municipality=="OLHOS DAGUA" & df$state=="MG"] <- "OLHOS D'AGUA"
df$municipality[df$municipality=="OLIVEIRA DO TOCANTINS" & df$state=="TO"] <- "OLIVEIRA DE FATIMA"
df$municipality[df$municipality=="PALMEIRA D OESTE" & df$state=="SP"] <- "PALMEIRA D'OESTE"
df$municipality[df$municipality=="PATI DO ALFERES" & df$state=="RJ"] <- "PATY DO ALFERES"
df$municipality[df$municipality=="PAU DARCO DO PIAUI" & df$state=="PI"] <- "PAU D'ARCO DO PIAUI"
df$municipality[df$municipality=="PEROLA D OESTE" & df$state=="PR"] <- "PEROLA D'OESTE"
df$municipality[df$municipality=="PINDORAMA DE GOIAS" & df$state=="TO"] <- "PINDORAMA DO TOCANTINS"
df$municipality[df$municipality=="PINGO DAGUA" & df$state=="MG"] <- "PINGO D'AGUA"
df$municipality[df$municipality=="PIRACUNUNGA" & df$state=="SP"] <- "PIRASSUNUNGA"
df$municipality[df$municipality=="PORTO ESPEREDIAO" & df$state=="MT"] <- "PORTO ESPERIDIAO"
df$municipality[df$municipality=="QUINJINGUE" & df$state=="BA"] <- "QUIJINGUE"
df$municipality[df$municipality=="SALMORAO" & df$state=="SP"] <- "SALMOURAO"
df$municipality[df$municipality=="SANTA ANA DO ITARARE" & df$state=="PR"] <- "SANTANA DO ITARARE"
df$municipality[df$municipality=="SANTA BARBARA D OESTE" & df$state=="SP"] <- "SANTA BARBARA D'OESTE"
df$municipality[df$municipality=="SANTA CECILIA DE UMBUZEIRO" & df$state=="PB"] <- "UMBUZEIRO"
df$municipality[df$municipality=="SANTA CLARA D OESTE" & df$state=="SP"] <- "SANTA CLARA D'OESTE"
df$municipality[df$municipality=="SANTA CRUZ DO MONTE CASTELO" & df$state=="PR"] <- "SANTA CRUZ DE MONTE CASTELO"
df$municipality[df$municipality=="SANTA IZABEL DO IVAI" & df$state=="PR"] <- "SANTA ISABEL DO IVAI"
df$municipality[df$municipality=="SANTA LUZIA DO OESTE" & df$state=="RO"] <- "SANTA LUZIA D'OESTE"
df$municipality[df$municipality=="SANTA MARIA DO JETIBA" & df$state=="ES"] <- "SANTA MARIA DE JETIBA"
df$municipality[df$municipality=="SANTA RITA DO OESTE" & df$state=="SP"] <- "SANTA RITA D'OESTE"
df$municipality[df$municipality=="SANTA RITA D OESTE" & df$state=="SP"] <- "SANTA RITA D'OESTE"
df$municipality[df$municipality=="SANTA RITA DO IBITIPOCA" & df$state=="MG"] <- "SANTA RITA DE IBITIPOCA"
df$municipality[df$municipality=="SANTAREM" & df$state=="PB"] <- "JOCA CLAUDINO"
df$municipality[df$municipality=="SANTO ANTONIO DA POSSE" & df$state=="SP"] <- "SANTO ANTONIO DE POSSE"
df$municipality[df$municipality=="SAO CAETANO" & df$state=="PE"] <- "SAO CAITANO"
df$municipality[df$municipality=="SAO JOAO D ALIANCA" & df$state=="GO"] <- "SAO JOAO D'ALIANCA"
df$municipality[df$municipality=="SAO JOAO DO PAU D ALHO" & df$state=="SP"] <- "SAO JOAO DO PAU D'ALHO"
df$municipality[df$municipality=="SAO JORGE D OESTE" & df$state=="PR"] <- "SAO JORGE D'OESTE"
df$municipality[df$municipality=="SAO JOSE DE CAMPESTRE" & df$state=="RN"] <- "SAO JOSE DO CAMPESTRE"
df$municipality[df$municipality=="SAO JOSE DO BREJO CRUZ" & df$state=="PB"] <- "SAO JOSE DO BREJO DO CRUZ"
df$municipality[df$municipality=="SAO LUIS GONZAGA" & df$state=="RS"] <- "SAO LUIZ GONZAGA"
df$municipality[df$municipality=="SAO LUIZ DO ANAUA" & df$state=="RR"] <- "SAO LUIZ"
df$municipality[df$municipality=="SAO MIGUEL DE TOUROS" & df$state=="RN"] <- "SAO MIGUEL DO GOSTOSO"
df$municipality[df$municipality=="SAO RAIMUNDO DA DOCA BEZERRA" & df$state=="MA"] <- "SAO RAIMUNDO DO DOCA BEZERRA"
df$municipality[df$municipality=="SAO SEB. DE LAGOA DE ROCA" & df$state=="PB"] <- "SAO SEBASTIAO DE LAGOA DE ROCA"
df$municipality[df$municipality=="SAO TOME DAS LETRAS" & df$state=="MG"] <- "SAO THOME DAS LETRAS"
df$municipality[df$municipality=="SAO VALERIO DO TOCANTINS" & df$state=="TO"] <- "SAO VALERIO"
df$municipality[df$municipality=="SENADOR CATUNDA" & df$state=="CE"] <- "CATUNDA"
df$municipality[df$municipality=="SENADOR LA ROQUE" & df$state=="MA"] <- "SENADOR LA ROCQUE"
df$municipality[df$municipality=="SENADOR TEOTONIO VILELA" & df$state=="AL"] <- "TEOTONIO VILELA"
df$municipality[df$municipality=="SITIO D ABADIA" & df$state=="GO"] <- "SITIO D'ABADIA"
df$municipality[df$municipality=="SUD MENUCCI" & df$state=="SP"] <- "SUD MENNUCCI"
df$municipality[df$municipality=="SUZANOPOLIS" & df$state=="SP"] <- "SUZANAPOLIS"
df$municipality[df$municipality=="TANQUE D ARCA" & df$state=="AL"] <- "TANQUE D'ARCA"
df$municipality[df$municipality=="TEJUSSUOCA" & df$state=="CE"] <- "TEJUCUOCA"
df$municipality[df$municipality=="VALPARAIZO" & df$state=="SP"] <- "VALPARAISO"
df$municipality[df$municipality=="VARRE E SAI" & df$state=="RJ"] <- "VARRE SAI"
df$municipality[df$municipality=="VILA ALTA" & df$state=="PR"] <- "ALTO PARAISO"
df$municipality[df$municipality=="VILA BELA STSSMA TRINDADE" & df$state=="MT"] <- "VILA BELA DA SANTISSIMA TRINDADE"
df$municipality[df$municipality=="VIZEU" & df$state=="PA"] <- "VISEU"
df$municipality[df$municipality=="OURO BRANCO" & df$state=="BA"] <- "OUROLANDIA"
df$municipality[df$municipality=="CAMPO DE SANTANA" & df$state=="PB"] <- "TACIMA"
df$municipality[df$municipality=="DARCYNOPOLIS" & df$state=="TO"] <- "DARCINOPOLIS"
df$municipality[df$municipality=="MUNHOZ DE MELLO" & df$state=="PR"] <- "MUNHOZ DE MELO"

# Replace - with space
df$municipality<-gsub("-"," ",df$municipality)

df$municipality[df$municipality=="AGUA BRANCA DO AMAPARI" & df$state=="AP"] <- "PEDRA BRANCA DO AMAPARI"
df$municipality[df$municipality=="AMPARO DE SAO FRANCISCO" & df$state=="SE"] <- "AMPARO DO SAO FRANCISCO"
df$municipality[df$municipality=="ANSELMO DA FONSECA" & df$state=="BA"] <- "CAEM"
df$municipality[df$municipality=="BALNEARIO RINCAO" & df$state=="SC"] <- "ICARA"
df$municipality[df$municipality=="BELEM DE SAO FRANCISCO" & df$state=="PE"] <- "BELEM DO SAO FRANCISCO"
df$municipality[df$municipality=="BOM JESUS DO ARAGUAIAL" & df$state=="MT"] <- "BOM JESUS DO ARAGUAIA"
df$municipality[df$municipality=="CAMPO GRANDE" & df$state=="RN"] <- "AUGUSTO SEVERO"
df$municipality[df$municipality=="CAMPOS" & df$state=="RJ"] <- "CAMPOS DOS GOYTACAZES" 
df$municipality[df$municipality=="COUTO DE MAGALHAES" & df$state=="TO"] <- "COUTO MAGALHAES"
df$municipality[df$municipality=="DIVINOPOLIS" & df$state=="TO"] <- "DIVINOPOLIS DO TOCANTINS"
df$municipality[df$municipality=="ELDORADO DOS CARAJAS" & df$state=="PA"] <- "ELDORADO DO CARAJAS"
df$municipality[df$municipality=="FLORINIA" & df$state=="SP"] <- "FLORINEA"
df$municipality[df$municipality=="IGUARACI" & df$state=="PE"] <- "IGUARACY"
df$municipality[df$municipality=="ITABIRINHA DE MANTENA" & df$state=="MG"] <- "ITABIRINHA"
df$municipality[df$municipality=="ITAMARACA" & df$state=="PE"] <- "ILHA DE ITAMARACA" 
df$municipality[df$municipality=="ITAPAGE" & df$state=="CE"] <- "ITAPAJE"
df$municipality[df$municipality=="JABOATAO" & df$state=="PE"] <- "JABOATAO DOS GUARARAPES"
df$municipality[df$municipality=="LAGOA DO ITAENGA" & df$state=="PE"] <- "LAGOA DE ITAENGA"
df$municipality[df$municipality=="MACHADINHO DO OESTE" & df$state=="RO"] <- "MACHADINHO D'OESTE" 
df$municipality[df$municipality=="MOJI MIRIM" & df$state=="SP"] <- "MOGI MIRIM"
df$municipality[df$municipality=="MONTE SANTO" & df$state=="TO"] <- "MONTE SANTO DO TOCANTINS"
df$municipality[df$municipality=="MUQUEM DE SAO FRANCISCO" & df$state=="BA"] <- "MUQUEM DO SAO FRANCISCO" 
df$municipality[df$municipality=="NAO ME TOQUE" & df$state=="RS"] <- "NAO ME-TOQUE" 
df$municipality[df$municipality=="NOVA BANDEIRANTE" & df$state=="MT"] <- "NOVA BANDEIRANTES"
df$municipality[df$municipality=="NOVA BRASILANDIA" & df$state=="RO"] <- "NOVA BRASILANDIA D'OESTE"
df$municipality[df$municipality=="QUATRO MARCOS" & df$state=="MT"] <- "SAO JOSE DOS QUATRO MARCOS"
df$municipality[df$municipality=="SANTA ROSA" & df$state=="AC"] <- "SANTA ROSA DO PURUS"
df$municipality[df$municipality=="SANTANA DO LIVRAMENTO" & df$state=="RS"] <- "SANT'ANA DO LIVRAMENTO" 
df$municipality[df$municipality=="SAO DOMINGOS" & df$state=="ES"] <- "SAO DOMINGOS DO NORTE" 
df$municipality[df$municipality=="SAO DOMINGOS DE POMBAL" & df$state=="PB"] <- "SAO DOMINGOS" 
df$municipality[df$municipality=="SAO LUIS DO PARAITINGA" & df$state=="SP"] <- "SAO LUIZ DO PARAITINGA" 
df$municipality[df$municipality=="TAPURA" & df$state=="MT"] <- "TAPURAH" 
df$municipality[df$municipality=="TRAJANO DE MORAIS" & df$state=="RJ"] <- "TRAJANO DE MORAES" 
df$municipality[df$municipality=="TRINDADE" & df$state=="RS"] <- "TRINDADE DO SUL" 
df$municipality[df$municipality=="VALPARAISO" & df$state=="GO"] <- "VALPARAISO DE GOIAS"
df$municipality[df$municipality=="CABO" & df$state=="PE"] <- "CABO DE SANTO AGOSTINHO"
df$municipality[df$municipality=="AGUA DOCE" & df$state=="MA"] <- "AGUA DOCE DO MARANHAO"
df$municipality[df$municipality=="AGUAS LINDAS" & df$state=="GO"] <- "AGUAS LINDAS DE GOIAS"
df$municipality[df$municipality=="ALAGOINHA" & df$state=="PI"] <- "ALAGOINHA DO PIAUI"
df$municipality[df$municipality=="ALTO PARAISO" & df$state=="GO"] <- "ALTO PARAISO DE GOIAS"
df$municipality[df$municipality=="AMAPARI" & df$state=="AP"] <- "PEDRA BRANCA DO AMAPARI"
df$municipality[df$municipality=="BADY BASSIT" & df$state=="SP"] <- "BADY BASSITT"
df$municipality[df$municipality=="BANDEIRANTE" & df$state=="TO"] <- "BANDEIRANTES DO TOCANTINS"
df$municipality[df$municipality=="BARAUNAS" & df$state=="PB"] <- "AREIA DE BARAUNAS"
df$municipality[df$municipality=="BELA VISTA" & df$state=="MA"] <- "BELA VISTA DO MARANHAO"
df$municipality[df$municipality=="BOM JESUS" & df$state=="GO"] <- "BOM JESUS DE GOIAS"
df$municipality[df$municipality=="BRASOPOLIS" & df$state=="MG"] <- "BRAZOPOLIS"
df$municipality[df$municipality=="PARATI" & df$state=="RJ"] <- "PARATY"
df$municipality[df$municipality=="PICARRAS" & df$state=="SC"] <- "BALNEARIO PICARRAS"
df$municipality[df$municipality=="POXOREO" & df$state=="MT"] <- "POXOREU"
df$municipality[df$municipality=="PRESIDENTE JUSCELINO" & df$state=="RN"] <- "SERRA CAIADA"
df$municipality[df$municipality=="SANTA ISABEL DO PARA" & df$state=="PA"] <- "SANTA IZABEL DO PARA"
df$municipality[df$municipality=="SANTA TERESINHA" & df$state=="BA"] <- "SANTA TEREZINHA"
df$municipality[df$municipality=="SAO VALERIO DA NATIVIDADE" & df$state=="TO"] <- "SAO VALERIO"
# Some of the candidates from EMBU DAS ARTES were labeled "EMBU" I checked the "lcode" variable to ensure these were note from EMBU GUACU also in SP
df$municipality[df$municipality=="EMBU" & df$state=="SP"] <- "EMBU DAS ARTES"


# Set the working directory
setwd("~/Papers/Evangelical Signalling/Data/RawData")

# Load the needed packages
library(foreign)
library(haven)
library(stringr)
library(dplyr)
library(sjlabelled)

# Read in the master data
#df<-data.frame(read_dta("dffinal.dta"))

# Read in the IBGE data
ev00<-read.csv("ibge_evan_2000.csv")
ev10<-read.csv("ibge_evan_2010.csv")
pop00<-read.csv("ibge_pop_2000.csv")
pop10<-read.csv("ibge_pop_2010.csv")

# Rbind the files
test1<-data.frame(rbind(ev00,ev10))
test2<-data.frame(rbind(pop00,pop10))
test <- merge(test1,test2,by=c("municipality","year"),)
test <- test %>% select(-code.x,-code.y)
head(test)

# Make them numeric
test$evan_pop<-as.numeric(test$evan_pop)
test$population<-as.numeric(test$population)
head(test$population,100)
tail(test$population,100)

# Create percent Evangelical
test$ibge_pct_evan <- test$evan_pop / test$population

# Then merge them
# Change all names so IBGE is in them (to differentiate from past IPUMS metrics)
names(test)[c(1,3,4)] <- c("ibge_municipality","ibge_evan_pop","ibge_pop")

# Remove special characters
# Upper case
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","E",test$ibge_municipality)
test$ibge_municipality <- gsub("�","I",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","U",test$ibge_municipality)
test$ibge_municipality <- gsub("�","C",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","E",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","U",test$ibge_municipality)
# Lower case just in case
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","E",test$ibge_municipality)
test$ibge_municipality <- gsub("�","I",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","U",test$ibge_municipality)
test$ibge_municipality <- gsub("�","C",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","A",test$ibge_municipality)
test$ibge_municipality <- gsub("�","E",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","O",test$ibge_municipality)
test$ibge_municipality <- gsub("�","U",test$ibge_municipality)

# Change all to upper case
test$ibge_municipality<-toupper(test$ibge_municipality)

# Create state variable
substrRight <- function(x, n){
  substr(x, nchar(x)-n+1, nchar(x))
}
test$state<-substrRight(test$ibge_municipality, 4)
test$state<-str_remove(test$state,"[(]")
test$state<-str_remove(test$state,"[)]")

# Create municipality variable
test$municipality <- trimws(sub("\\(.*", "", test$ibge_municipality))
test$municipality <-str_replace(test$municipality,"[-]"," ")

# Create the two year divisions in the master data
df$period <- 2
df$period[df$year==2000 | df$year==2002 | df$year==2004 ] <- 1
table(df$period,df$year)

# Need to do this for IBGE as well
test$period <- 1
test$period[test$year==2010] <- 2
table(test$period,test$year)

# Remove year variable
test <- test %>% select(-year)
colnames(test)
test

# # Make state into character - things went funky when I tried to automate
# df$state_new <- rep(NA,length(df$state))
# names <- get_labels(df$state)[-7]
# states <- unique(df$state)
# for (i in 1:length(states)){
#   df$state_new[df$state==states[i]] <- names[i]
# }
# 
# df$state_new[df$municipality=="CASTELO"]
# table(df$state)
# table(df$state_new)
# names
# # Redfine state
# df$state<-df$state_new

# Make sure they are characters
test$municipality<-as.character(test$municipality)
df$municipality<-as.character(df$municipality)

# Merge in with master data
temp<-merge (df,test,by=c("municipality","state","period"),all.x=T)


##################################################################################
# For instances where the municipality didn't exist in 2000 I used the 2010 data #
##################################################################################
#################################################
# Doing this for the Evangelical share variable #
#################################################
# ACEGUA
temp$ibge_pct_evan[temp$municipality=="ACEGUA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ACEGUA" & temp$state=="RS" & temp$period==2])

# ALMIRANTE TAMANDARE DO SUL
temp$ibge_pct_evan[temp$municipality=="ALMIRANTE TAMANDARE DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ALMIRANTE TAMANDARE DO SUL" & temp$state=="RS" & temp$period==2])

# AROEIRA DO ITAIM
temp$ibge_pct_evan[temp$municipality=="AROEIRAS DO ITAIM" & temp$state=="PI" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="AROEIRAS DO ITAIM" & temp$state=="PI" & temp$period==2])

# ARROIO DO PADRE
temp$ibge_pct_evan[temp$municipality=="ARROIO DO PADRE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ARROIO DO PADRE" & temp$state=="RS" & temp$period==2])

# BARROCAS
temp$ibge_pct_evan[temp$municipality=="BARROCAS" & temp$state=="BA" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BARROCAS" & temp$state=="BA" & temp$period==2])

# BOA VISTA DO CADEADO
temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO CADEADO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO CADEADO" & temp$state=="RS" & temp$period==2])

# BOA VISTA DO INCRA
temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO INCRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO INCRA" & temp$state=="RS" & temp$period==2])

# BOA VISTA DO SUL
temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BOA VISTA DO SUL" & temp$state=="RS" & temp$period==2])

# BOM JESUS DO ARAGUAIA
temp$ibge_pct_evan[temp$municipality=="BOM JESUS DO ARAGUAIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BOM JESUS DO ARAGUAIA" & temp$state=="MT" & temp$period==2])

# BOZANO
temp$ibge_pct_evan[temp$municipality=="BOZANO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BOZANO" & temp$state=="RS" & temp$period==2])

# CAMPO LIMPO DE GOIAS
temp$ibge_pct_evan[temp$municipality=="CAMPO LIMPO DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CAMPO LIMPO DE GOIAS" & temp$state=="GO" & temp$period==2])

# CANUDOS DO VALE
temp$ibge_pct_evan[temp$municipality=="CANUDOS DO VALE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CANUDOS DO VALE" & temp$state=="RS" & temp$period==2])

# CAPAO BONITO DO SUL
temp$ibge_pct_evan[temp$municipality=="CAPAO BONITO DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CAPAO BONITO DO SUL" & temp$state=="RS" & temp$period==2])

# CAPAO DO CIPO
temp$ibge_pct_evan[temp$municipality=="CAPAO DO CIPO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CAPAO DO CIPO" & temp$state=="RS" & temp$period==2])

# CARRAPATEIRA
temp$ibge_pct_evan[temp$municipality=="CARRAPATEIRA" & temp$state=="PB" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CARRAPATEIRA" & temp$state=="PB" & temp$period==2])

# COLNIZA
temp$ibge_pct_evan[temp$municipality=="COLNIZA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="COLNIZA" & temp$state=="MT" & temp$period==2])

# CONQUISTA D'OESTE
temp$ibge_pct_evan[temp$municipality=="CONQUISTA D'OESTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CONQUISTA D'OESTE" & temp$state=="MT" & temp$period==2])

# CORONEL PILAR
temp$ibge_pct_evan[temp$municipality=="CORONEL PILAR" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CORONEL PILAR" & temp$state=="RS" & temp$period==2])

# COQUEIRO BAIXO
temp$ibge_pct_evan[temp$municipality=="COQUEIRO BAIXO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="COQUEIRO BAIXO" & temp$state=="RS" & temp$period==2])

# CRUZALTENSE
temp$ibge_pct_evan[temp$municipality=="CRUZALTENSE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CRUZALTENSE" & temp$state=="RS" & temp$period==2])

# CURVELANDIA
temp$ibge_pct_evan[temp$municipality=="CURVELANDIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CURVELANDIA" & temp$state=="MT" & temp$period==2])

# FIGUEIRAO
temp$ibge_pct_evan[temp$municipality=="FIGUEIRAO" & temp$state=="MS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="FIGUEIRAO" & temp$state=="MS" & temp$period==2])

# FORQUETINHA
temp$ibge_pct_evan[temp$municipality=="FORQUETINHA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="FORQUETINHA" & temp$state=="RS" & temp$period==2])

# GAMELEIRA DE GOIAS
temp$ibge_pct_evan[temp$municipality=="GAMELEIRA DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="GAMELEIRA DE GOIAS" & temp$state=="GO" & temp$period==2])

# GOVERNADOR LINDENBERG
temp$ibge_pct_evan[temp$municipality=="GOVERNADOR LINDENBERG" & temp$state=="ES" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="GOVERNADOR LINDENBERG" & temp$state=="ES" & temp$period==2])

# IPIRANGA DE GOIAS
temp$ibge_pct_evan[temp$municipality=="IPIRANGA DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="IPIRANGA DE GOIAS" & temp$state=="GO" & temp$period==2])

# IPIRANGA DO NORTE
temp$ibge_pct_evan[temp$municipality=="IPIRANGA DO NORTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="IPIRANGA DO NORTE" & temp$state=="MT" & temp$period==2])

# ITANHANGA
temp$ibge_pct_evan[temp$municipality=="ITANHANGA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ITANHANGA" & temp$state=="MT" & temp$period==2])

# ITATI
temp$ibge_pct_evan[temp$municipality=="ITATI" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ITATI" & temp$state=="RS" & temp$period==2])

# JACUIZINHO
temp$ibge_pct_evan[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==2])

# JACUIZINHO
temp$ibge_pct_evan[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==2])

# JEQUIA DA PRAIA
temp$ibge_pct_evan[temp$municipality=="JEQUIA DA PRAIA" & temp$state=="AL" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="JEQUIA DA PRAIA" & temp$state=="AL" & temp$period==2])

# JUNDIA
temp$ibge_pct_evan[temp$municipality=="JUNDIA" & temp$state=="RN" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="JUNDIA" & temp$state=="RN" & temp$period==2])

# LAGOA BONITA DO SUL
temp$ibge_pct_evan[temp$municipality=="LAGOA BONITA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="LAGOA BONITA DO SUL" & temp$state=="RS" & temp$period==2])

# LAGOA SANTA
temp$ibge_pct_evan[temp$municipality=="LAGOA SANTA" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="LAGOA SANTA" & temp$state=="GO" & temp$period==2])

# LUIS EDUARDO MAGALHAES
temp$ibge_pct_evan[temp$municipality=="LUIS EDUARDO MAGALHAES" & temp$state=="BA" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="LUIS EDUARDO MAGALHAES" & temp$state=="BA" & temp$period==2])

# MATO QUEIMADO
temp$ibge_pct_evan[temp$municipality=="MATO QUEIMADO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="MATO QUEIMADO" & temp$state=="RS" & temp$period==2])

# MESQUITA
temp$ibge_pct_evan[temp$municipality=="MESQUITA" & temp$state=="RJ" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="MESQUITA" & temp$state=="RJ" & temp$period==2])

# Mojui dos campos, PA was considered part of Santarem, PA for the sake of the 2000 and 2010 census, accordingly I use those figures
temp$ibge_pct_evan[temp$municipality=="MOJUI DOS CAMPOS" & temp$state=="PA" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTAREM" & temp$state=="PA" & temp$period==1])

temp$ibge_pct_evan[temp$municipality=="MOJUI DOS CAMPOS" & temp$state=="PA" & temp$period==2] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTAREM" & temp$state=="PA" & temp$period==2])

# MATO QUEIMADO
temp$ibge_pct_evan[temp$municipality=="NOVA ALVORADA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVA ALVORADA" & temp$state=="RS" & temp$period==2])

# NOVO NAZAERE
temp$ibge_pct_evan[temp$municipality=="NOVA NAZARE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVA NAZARE" & temp$state=="MT" & temp$period==2])

# NOVO ROMA DO SUL
temp$ibge_pct_evan[temp$municipality=="NOVA ROMA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVA ROMA DO SUL" & temp$state=="RS" & temp$period==2])

# NOVO ROMA DO SUL
temp$ibge_pct_evan[temp$municipality=="NOVA SANTA HELENA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVA SANTA HELENA" & temp$state=="MT" & temp$period==2])

# NOVO SANTO ANTONIO
temp$ibge_pct_evan[temp$municipality=="NOVO SANTO ANTONIO" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVO SANTO ANTONIO" & temp$state=="MT" & temp$period==2])

# NOVO XINGU
temp$ibge_pct_evan[temp$municipality=="NOVO XINGU" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="NOVO XINGU" & temp$state=="RS" & temp$period==2])

# PAU D'ARCO DO PIAUI
temp$ibge_pct_evan[temp$municipality=="PAU D'ARCO DO PIAUI" & temp$state=="PI" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="PAU D'ARCO DO PIAUI" & temp$state=="PI" & temp$period==2])

# PAULO BENTO
temp$ibge_pct_evan[temp$municipality=="PAULO BENTO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="PAULO BENTO" & temp$state=="RS" & temp$period==2])

# PEDRAS ALTAS
temp$ibge_pct_evan[temp$municipality=="PEDRAS ALTAS" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="PEDRAS ALTAS" & temp$state=="RS" & temp$period==2])

# PINHAL DA SERRA
temp$ibge_pct_evan[temp$municipality=="PINHAL DA SERRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="PINHAL DA SERRA" & temp$state=="RS" & temp$period==2])

# PROTASIO ALVES
temp$ibge_pct_evan[temp$municipality=="PROTASIO ALVES" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="PROTASIO ALVES" & temp$state=="RS" & temp$period==2])

#  QUATRO IRMAOS
temp$ibge_pct_evan[temp$municipality=="QUATRO IRMAOS" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="QUATRO IRMAOS" & temp$state=="RS" & temp$period==2])

#  QUELUZITO
temp$ibge_pct_evan[temp$municipality=="QUELUZITO" & temp$state=="MG" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="QUELUZITO" & temp$state=="MG" & temp$period==2])

#  RELVADO
temp$ibge_pct_evan[temp$municipality=="RELVADO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="RELVADO" & temp$state=="RS" & temp$period==2])

#  ROLADOR
temp$ibge_pct_evan[temp$municipality=="ROLADOR" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="ROLADOR" & temp$state=="RS" & temp$period==2])

#  ROLADOR
temp$ibge_pct_evan[temp$municipality=="RONDOLANDIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="RONDOLANDIA" & temp$state=="MT" & temp$period==2])

#  SANTA CECILIA DO SUL
temp$ibge_pct_evan[temp$municipality=="SANTA CECILIA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTA CECILIA DO SUL" & temp$state=="RS" & temp$period==2])

#  SANTA CRUZ DO XINGU
temp$ibge_pct_evan[temp$municipality=="SANTA CRUZ DO XINGU" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTA CRUZ DO XINGU" & temp$state=="MT" & temp$period==2])

#  SANTA MARGARIDA DO SUL
temp$ibge_pct_evan[temp$municipality=="SANTA MARGARIDA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTA MARGARIDA DO SUL" & temp$state=="RS" & temp$period==2])

#  SANTA RITA DO TRIVELATO
temp$ibge_pct_evan[temp$municipality=="SANTA RITA DO TRIVELATO" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTA RITA DO TRIVELATO" & temp$state=="MT" & temp$period==2])

#  SANTO ANTONIO DO LESTE
temp$ibge_pct_evan[temp$municipality=="SANTO ANTONIO DO LESTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTO ANTONIO DO LESTE" & temp$state=="MT" & temp$period==2])

#  SANTO ANTONIO DO PALMA
temp$ibge_pct_evan[temp$municipality=="SANTO ANTONIO DO PALMA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SANTO ANTONIO DO PALMA" & temp$state=="RS" & temp$period==2])

#  SAO JORGE
temp$ibge_pct_evan[temp$municipality=="SAO JORGE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SAO JORGE" & temp$state=="RS" & temp$period==2])

#  SAO JOSE DO SUL
temp$ibge_pct_evan[temp$municipality=="SAO JOSE DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SAO JOSE DO SUL" & temp$state=="RS" & temp$period==2])

#  SAO PEDRO DAS MISSOES
temp$ibge_pct_evan[temp$municipality=="SAO PEDRO DAS MISSOES" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SAO PEDRO DAS MISSOES" & temp$state=="RS" & temp$period==2])

#  SAO PEDRO DAS MISSOES
temp$ibge_pct_evan[temp$municipality=="SERRA NOVA DOURADA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="SERRA NOVA DOURADA" & temp$state=="MT" & temp$period==2])

#  TIO HUGO
temp$ibge_pct_evan[temp$municipality=="TIO HUGO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="TIO HUGO" & temp$state=="RS" & temp$period==2])

#  UNIAO DA SERRA
temp$ibge_pct_evan[temp$municipality=="UNIAO DA SERRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="UNIAO DA SERRA" & temp$state=="RS" & temp$period==2])

#  VALE DE SAO DOMINGOS 
temp$ibge_pct_evan[temp$municipality=="VALE DE SAO DOMINGOS" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="VALE DE SAO DOMINGOS" & temp$state=="MT" & temp$period==2])

# VESPASIANO CORREA
temp$ibge_pct_evan[temp$municipality=="VESPASIANO CORREA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="VESPASIANO CORREA" & temp$state=="RS" & temp$period==2])

# WESTFALIA
temp$ibge_pct_evan[temp$municipality=="WESTFALIA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="WESTFALIA" & temp$state=="RS" & temp$period==2])


# A few others that just gave previous name
# NOVO XINGU
temp$ibge_pct_evan[temp$municipality=="PARAISO DAS AGUAS" & temp$state=="MS" & temp$period==2] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="CHAPADAO DO SUL" & temp$state=="MS" & temp$period==2])
# PESCARIA BRAVA
temp$ibge_pct_evan[temp$municipality=="PESCARIA BRAVA" & temp$state=="SC" & temp$period==2] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="LAGUNA" & temp$state=="SC" & temp$period==2])
# PINTO BANDEIRA
temp$ibge_pct_evan[temp$municipality=="PINTO BANDEIRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BENTO GONCALVES" & temp$state=="RS" & temp$period==1])
temp$ibge_pct_evan[temp$municipality=="PINTO BANDEIRA" & temp$state=="RS" & temp$period==2] <- 
  unique(temp$ibge_pct_evan[temp$municipality=="BENTO GONCALVES" & temp$state=="RS" & temp$period==2])

# Making sure we have reliigous demographic stuff for every candidate - this object should give no results
unique(temp$municipality[is.na(temp$ibge_pct_evan)])

#################################################
# Doing this for the population variable #
#################################################
# ACEGUA
temp$ibge_pop[temp$municipality=="ACEGUA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ACEGUA" & temp$state=="RS" & temp$period==2])

# ALMIRANTE TAMANDARE DO SUL
temp$ibge_pop[temp$municipality=="ALMIRANTE TAMANDARE DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ALMIRANTE TAMANDARE DO SUL" & temp$state=="RS" & temp$period==2])

# AROEIRA DO ITAIM
temp$ibge_pop[temp$municipality=="AROEIRAS DO ITAIM" & temp$state=="PI" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="AROEIRAS DO ITAIM" & temp$state=="PI" & temp$period==2])

# ARROIO DO PADRE
temp$ibge_pop[temp$municipality=="ARROIO DO PADRE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ARROIO DO PADRE" & temp$state=="RS" & temp$period==2])

# BARROCAS
temp$ibge_pop[temp$municipality=="BARROCAS" & temp$state=="BA" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BARROCAS" & temp$state=="BA" & temp$period==2])

# BOA VISTA DO CADEADO
temp$ibge_pop[temp$municipality=="BOA VISTA DO CADEADO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BOA VISTA DO CADEADO" & temp$state=="RS" & temp$period==2])

# BOA VISTA DO INCRA
temp$ibge_pop[temp$municipality=="BOA VISTA DO INCRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BOA VISTA DO INCRA" & temp$state=="RS" & temp$period==2])

# BOM JESUS DO ARAGUAIA
temp$ibge_pop[temp$municipality=="BOM JESUS DO ARAGUAIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BOM JESUS DO ARAGUAIA" & temp$state=="MT" & temp$period==2])

# BOZANO
temp$ibge_pop[temp$municipality=="BOZANO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BOZANO" & temp$state=="RS" & temp$period==2])

# CAMPO LIMPO DE GOIAS
temp$ibge_pop[temp$municipality=="CAMPO LIMPO DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CAMPO LIMPO DE GOIAS" & temp$state=="GO" & temp$period==2])

# CANUDOS DO VALE
temp$ibge_pop[temp$municipality=="CANUDOS DO VALE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CANUDOS DO VALE" & temp$state=="RS" & temp$period==2])

# CAPAO BONITO DO SUL
temp$ibge_pop[temp$municipality=="CAPAO BONITO DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CAPAO BONITO DO SUL" & temp$state=="RS" & temp$period==2])

# CAPAO DO CIPO
temp$ibge_pop[temp$municipality=="CAPAO DO CIPO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CAPAO DO CIPO" & temp$state=="RS" & temp$period==2])

# COLNIZA
temp$ibge_pop[temp$municipality=="COLNIZA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="COLNIZA" & temp$state=="MT" & temp$period==2])

# CONQUISTA D'OESTE
temp$ibge_pop[temp$municipality=="CONQUISTA D'OESTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CONQUISTA D'OESTE" & temp$state=="MT" & temp$period==2])

# CORONEL PILAR
temp$ibge_pop[temp$municipality=="CORONEL PILAR" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CORONEL PILAR" & temp$state=="RS" & temp$period==2])

# COQUEIRO BAIXO
temp$ibge_pop[temp$municipality=="COQUEIRO BAIXO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="COQUEIRO BAIXO" & temp$state=="RS" & temp$period==2])

# CRUZALTENSE
temp$ibge_pop[temp$municipality=="CRUZALTENSE" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CRUZALTENSE" & temp$state=="RS" & temp$period==2])

# CURVELANDIA
temp$ibge_pop[temp$municipality=="CURVELANDIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="CURVELANDIA" & temp$state=="MT" & temp$period==2])

# FIGUEIRAO
temp$ibge_pop[temp$municipality=="FIGUEIRAO" & temp$state=="MS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="FIGUEIRAO" & temp$state=="MS" & temp$period==2])

# FORQUETINHA
temp$ibge_pop[temp$municipality=="FORQUETINHA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="FORQUETINHA" & temp$state=="RS" & temp$period==2])

# GAMELEIRA DE GOIAS
temp$ibge_pop[temp$municipality=="GAMELEIRA DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="GAMELEIRA DE GOIAS" & temp$state=="GO" & temp$period==2])

# GOVERNADOR LINDENBERG
temp$ibge_pop[temp$municipality=="GOVERNADOR LINDENBERG" & temp$state=="ES" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="GOVERNADOR LINDENBERG" & temp$state=="ES" & temp$period==2])

# IPIRANGA DE GOIAS
temp$ibge_pop[temp$municipality=="IPIRANGA DE GOIAS" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="IPIRANGA DE GOIAS" & temp$state=="GO" & temp$period==2])

# IPIRANGA DO NORTE
temp$ibge_pop[temp$municipality=="IPIRANGA DO NORTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="IPIRANGA DO NORTE" & temp$state=="MT" & temp$period==2])

# ITANHANGA
temp$ibge_pop[temp$municipality=="ITANHANGA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ITANHANGA" & temp$state=="MT" & temp$period==2])

# ITATI
temp$ibge_pop[temp$municipality=="ITATI" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ITATI" & temp$state=="RS" & temp$period==2])

# JACUIZINHO
temp$ibge_pop[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="JACUIZINHO" & temp$state=="RS" & temp$period==2])

# JEQUIA DA PRAIA
temp$ibge_pop[temp$municipality=="JEQUIA DA PRAIA" & temp$state=="AL" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="JEQUIA DA PRAIA" & temp$state=="AL" & temp$period==2])

# JUNDIA
temp$ibge_pop[temp$municipality=="JUNDIA" & temp$state=="RN" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="JUNDIA" & temp$state=="RN" & temp$period==2])

# LAGOA BONITA DO SUL
temp$ibge_pop[temp$municipality=="LAGOA BONITA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="LAGOA BONITA DO SUL" & temp$state=="RS" & temp$period==2])

# LAGOA SANTA
temp$ibge_pop[temp$municipality=="LAGOA SANTA" & temp$state=="GO" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="LAGOA SANTA" & temp$state=="GO" & temp$period==2])

# LUIS EDUARDO MAGALHAES
temp$ibge_pop[temp$municipality=="LUIS EDUARDO MAGALHAES" & temp$state=="BA" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="LUIS EDUARDO MAGALHAES" & temp$state=="BA" & temp$period==2])

# MATO QUEIMADO
temp$ibge_pop[temp$municipality=="MATO QUEIMADO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="MATO QUEIMADO" & temp$state=="RS" & temp$period==2])

# MESQUITA
temp$ibge_pop[temp$municipality=="MESQUITA" & temp$state=="RJ" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="MESQUITA" & temp$state=="RJ" & temp$period==2])

# Mojui dos campos, PA was considered part of Santarem, PA for the sake of the 2000 and 2010 census, accordingly I use those figures
temp$ibge_pop[temp$municipality=="MOJUI DOS CAMPOS" & temp$state=="PA" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTAREM" & temp$state=="PA" & temp$period==1])

temp$ibge_pop[temp$municipality=="MOJUI DOS CAMPOS" & temp$state=="PA" & temp$period==2] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTAREM" & temp$state=="PA" & temp$period==2])

# NOVO NAZAERE
temp$ibge_pop[temp$municipality=="NOVA NAZARE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="NOVA NAZARE" & temp$state=="MT" & temp$period==2])

# NOVO SANTA HELENA
temp$ibge_pop[temp$municipality=="NOVA SANTA HELENA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="NOVA SANTA HELENA" & temp$state=="MT" & temp$period==2])

# NOVO SANTO ANTONIO
temp$ibge_pop[temp$municipality=="NOVO SANTO ANTONIO" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="NOVO SANTO ANTONIO" & temp$state=="MT" & temp$period==2])

# NOVO XINGU
temp$ibge_pop[temp$municipality=="NOVO XINGU" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="NOVO XINGU" & temp$state=="RS" & temp$period==2])

# PAU D'ARCO DO PIAUI
temp$ibge_pop[temp$municipality=="PAU D'ARCO DO PIAUI" & temp$state=="PI" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="PAU D'ARCO DO PIAUI" & temp$state=="PI" & temp$period==2])

# PAULO BENTO
temp$ibge_pop[temp$municipality=="PAULO BENTO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="PAULO BENTO" & temp$state=="RS" & temp$period==2])

# PEDRAS ALTAS
temp$ibge_pop[temp$municipality=="PEDRAS ALTAS" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="PEDRAS ALTAS" & temp$state=="RS" & temp$period==2])

# PINHAL DA SERRA
temp$ibge_pop[temp$municipality=="PINHAL DA SERRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="PINHAL DA SERRA" & temp$state=="RS" & temp$period==2])

# PROTASIO ALVES
temp$ibge_pop[temp$municipality=="PROTASIO ALVES" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="PROTASIO ALVES" & temp$state=="RS" & temp$period==2])

#  QUATRO IRMAOS
temp$ibge_pop[temp$municipality=="QUATRO IRMAOS" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="QUATRO IRMAOS" & temp$state=="RS" & temp$period==2])
#  ROLADOR
temp$ibge_pop[temp$municipality=="ROLADOR" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="ROLADOR" & temp$state=="RS" & temp$period==2])

#  RONDOLANDIA
temp$ibge_pop[temp$municipality=="RONDOLANDIA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="RONDOLANDIA" & temp$state=="MT" & temp$period==2])

#  SANTA CECILIA DO SUL
temp$ibge_pop[temp$municipality=="SANTA CECILIA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTA CECILIA DO SUL" & temp$state=="RS" & temp$period==2])

#  SANTA CRUZ DO XINGU
temp$ibge_pop[temp$municipality=="SANTA CRUZ DO XINGU" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTA CRUZ DO XINGU" & temp$state=="MT" & temp$period==2])

#  SANTA MARGARIDA DO SUL
temp$ibge_pop[temp$municipality=="SANTA MARGARIDA DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTA MARGARIDA DO SUL" & temp$state=="RS" & temp$period==2])

#  SANTA RITA DO TRIVELATO
temp$ibge_pop[temp$municipality=="SANTA RITA DO TRIVELATO" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTA RITA DO TRIVELATO" & temp$state=="MT" & temp$period==2])

#  SANTO ANTONIO DO LESTE
temp$ibge_pop[temp$municipality=="SANTO ANTONIO DO LESTE" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SANTO ANTONIO DO LESTE" & temp$state=="MT" & temp$period==2])

#  SAO JOSE DO SUL
temp$ibge_pop[temp$municipality=="SAO JOSE DO SUL" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SAO JOSE DO SUL" & temp$state=="RS" & temp$period==2])

#  SAO PEDRO DAS MISSOES
temp$ibge_pop[temp$municipality=="SAO PEDRO DAS MISSOES" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SAO PEDRO DAS MISSOES" & temp$state=="RS" & temp$period==2])

#  SERRA NOVA DOURADA 
temp$ibge_pop[temp$municipality=="SERRA NOVA DOURADA" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="SERRA NOVA DOURADA" & temp$state=="MT" & temp$period==2])

#  TIO HUGO
temp$ibge_pop[temp$municipality=="TIO HUGO" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="TIO HUGO" & temp$state=="RS" & temp$period==2])
#  VALE DE SAO DOMINGOS 
temp$ibge_pop[temp$municipality=="VALE DE SAO DOMINGOS" & temp$state=="MT" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="VALE DE SAO DOMINGOS" & temp$state=="MT" & temp$period==2])

# WESTFALIA
temp$ibge_pop[temp$municipality=="WESTFALIA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="WESTFALIA" & temp$state=="RS" & temp$period==2])

# A few others just gave previous name
# PARAISO DAS AGUAS
temp$ibge_pop[temp$municipality=="PARAISO DAS AGUAS" & temp$state=="MS" & temp$period==2] <- 
  unique(temp$ibge_pop[temp$municipality=="CHAPADAO DO SUL" & temp$state=="MS" & temp$period==2])
# PESCARIA BRAVA
temp$ibge_pop[temp$municipality=="PESCARIA BRAVA" & temp$state=="SC" & temp$period==2] <- 
  unique(temp$ibge_pop[temp$municipality=="LAGUNA" & temp$state=="SC" & temp$period==2])
# PINTO BANDEIRA
temp$ibge_pop[temp$municipality=="PINTO BANDEIRA" & temp$state=="RS" & temp$period==1] <- 
  unique(temp$ibge_pop[temp$municipality=="BENTO GONCALVES" & temp$state=="RS" & temp$period==1])
temp$ibge_pop[temp$municipality=="PINTO BANDEIRA" & temp$state=="RS" & temp$period==2] <- 
  unique(temp$ibge_pop[temp$municipality=="BENTO GONCALVES" & temp$state=="RS" & temp$period==2])

# Changing some names to be more intuitive
temp$anyevan <- temp$ibge_pct_evan
temp$population <- temp$ibge_pop

# Bringing in data showing the threshold of victory
setwd("~/Papers/Evangelical Signalling/Data/RawData")
thresh<-read.csv("thresh.csv")
temp <- merge(temp,thresh,by=c("year","lcode"))

# Writing it to CSV
write.csv(temp,"Data_Intermediary.csv")

temp[temp$name=="MACIEIERA FREIRE DE ANDRADE",]


temp[grepl("DOUTROA",temp$ballot, fixed = TRUE) & temp$prop==0,]

temp[grepl("FREIRE DE ANDRADE",temp$name, fixed = TRUE) & temp$prop==0,]

temp[grepl("ESTEVES ROQUE",temp$name, fixed = TRUE) & temp$prop==0,]




















